#Loading some libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mpld3
import seaborn as sns
from scipy.interpolate import interp1d
%matplotlib inline
import scipy.stats
import os
user=os.getenv('USERNAME')
plt.style.use('ggplot')
# Loading dataframe containging technical data and power curve:
# - power curve were donwloaded from the renewable ninja github (https://github.com/renewables-ninja)
# - technical data come from thewindpower (https://www.thewindpower.net/)
df = pd.read_pickle(r'C:\Users\romain\Desktop\Power_Curve_Project/dataframe.pkl')
#df = df.dropna()
df_TD = df.loc['technical_data']
df_TDt = df_TD.T
df_PCn = df.loc['power_curve']
df_TDt.head()
# Loading of power curves from Renewable.Ninja
df_pc = pd.read_csv(r'C:\Users\romain\Desktop\Power_Curve_Project/power_curves.txt', header = 0, index_col = 0)
df_pc1 = pd.read_csv(r'C:\Users\romain\Desktop\Power_Curve_Project/power_curves_01.txt', header = 0, index_col = 0)
df_pc2 = pd.read_csv(r'C:\Users\romain\Desktop\Power_Curve_Project/power_curves_02.txt', header = 0, index_col = 0)
df_pc3 = pd.read_csv(r'C:\Users\romain\Desktop\Power_Curve_Project/power_curves_03.txt', header = 0, index_col = 0)
df_pc4 = pd.read_csv(r'C:\Users\romain\Desktop\Power_Curve_Project/power_curves_04.txt', header = 0, index_col = 0)
df_PCj = pd.concat([df_pc, df_pc1, df_pc2, df_pc3, df_pc4], axis = 1, keys = ['pc', 'pc1', 'pc2', 'pc3', 'pc4'])
df_PCj = df_PCj.swaplevel(axis = 1)
#Observation of normalized power curve
fig, ax = plt.subplots(figsize = (16,9))
df_PCn.iloc[::10, :].plot(ax =ax, alpha = 0.5, legend = False,)
plt.title('Normalized wind turbine power curves')
plt.ylabel('Power (kW/$kW_{nom}$)')
plt.xlabel('Windspeed (m/s)')
Some turbines reach their nominal power at lower wind speed than others.
As demonstrated below, it is determined by their rotor size.
#Observation of power curves in absolute power
df_PC = df_PCn * df_TDt['Rated power']
fig, ax = plt.subplots(figsize = (16,9))
df_PC.iloc[::10, :].plot(ax =ax, alpha = 0.5, legend = False,)
plt.title('Wind turbine power curves')
plt.ylabel('Power (kW)')
plt.xlabel('Windspeed (m/s)')
Production increase with the cubic windspeed before saturating to their nominal power.
#Observation of power curve normalized by swept area
df_PCs = df_PC / df_TDt['Swept area']
fig, ax = plt.subplots(figsize = (16,9))
df_PCs.iloc[::10, :].plot(ax =ax, alpha = 0.5, legend =False)
plt.title('Power curves divided by swept area')
plt.ylabel('Power (kW) / S (m²)')
plt.xlabel('Windspeed (m/s)')
When normalised by swept area, power curves become similar.
#Observation of wind power coefficient (Cp) as a function of wind speed
rho = 1.225
df_Cp = (2*df_PCs[1:]*1e3).div(rho * df_PCs[1:].index.values**3, axis = 0)
fig, ax = plt.subplots(figsize = (16,9))
df_Cp.iloc[::10, :].plot(ax = ax, alpha = 0.5, legend = False)
plt.title('Wind turbine power coefficient : $C_p$')
plt.ylabel('$C_p$')
plt.xlabel('Windspeed (m/s)')
The Cp reach its maximum after the cut-in wind speed and decreased at windspeed for which the production saturate.
#Observation of the distribution of maximum Cp value
fig, ax = plt.subplots()
plt.title('Distribution of maxium Cp values')
plt.ylabel('Occurence')
plt.xlabel('Cp values')
display(df_Cp.max().describe())
df_Cp.max().hist()
#Observation of power curves normalized by swept area and compared to median Cp value of 0.44
df_PCs = df_PC / df_TDt['Swept area']
fig, ax = plt.subplots(figsize = (16,9))
df_PCs.iloc[::10, :].plot(ax =ax, alpha = 0.1, color = 'grey', lw = 2, legend = False)
plt.title('Power curves')
plt.title('Power curves divided by swept area')
plt.ylabel('Power (kW) / S (m²)')
v = np.arange(0, 15, 0.1)
plt.plot(v, 1/2*1.225*0.44*v**3/1e3)
Power curves are correctly modelled untill the production saturate.
The power will be limited to the nominal power of the wind turbine and smooth by a gaussian filter with a width corresponding the turbulence intensity.
# Model to generate power curve from nominale power and rotor dimension
#Fonction générant la power curve
def PowerCurveGenerator(P, d, TI = 0.10, cut_in = 3.5, cut_out = 25, Cp = 0.44, normalized = False):
"""
This function generate wind turbine power curve from nominal power and rotor dimension and turbulence intensity used to smooth the power curve (10% by default).
List of parameters : P (rated power) expressed in kW, d (rotor diameter) expressed in m.
Cut-in wind speed and cut-out wind speed can be adjusted, by default values are 3.5 m/s and 25 m/s.
Cp value is set at 0.44 corresponding to the mean value of a set of wind turbine model.
"""
#Convertion from kW to W
P = P*1e3
#Physical parameters
rho = 1.225 #kg/m3, air density, could be calculated based on temperature and altitude
S = np.pi * (d/2)**2 #m², rotor area
#Calculation parameters
a = 5 #m/s, Truncature width of the Gaussian filter for turbulence intensity
speed_step = 0.1 #Step for wind power curve
#Power calculation (P proportional to wind_speed^3)
df_bpc = pd.DataFrame(index = np.arange(0, 40 + speed_step, speed_step), columns = ['wind_speed'])
df_bpc['wind_speed'] = df_bpc.index
df_bpc['P'] = 1/2 * rho * S * Cp * df_bpc.wind_speed**3
#Saturation of the power output to the nominal value
df_bpc['P'][ df_bpc['P'] > P ] = P
#Gaussian filter over w*(1-TI):w*(1+TI), TI being the turbulence intensity
df_bpc['P_with_TI'] = np.nan
df_bpc.iloc[ ::int(1/speed_step), df_bpc.columns.get_loc('P_with_TI')] = [ df_bpc['P'].rolling(window = int(a*TI*w/speed_step), win_type='gaussian', center = True).mean(std = int(TI*w/speed_step)).loc[w] for w in np.arange(0,41)]
df_bpc['P_with_TI'].interpolate(method='cubic', inplace = True)
df_bpc['P_with_TI'][df_bpc['P_with_TI'] < 0] = 0
df_bpc['P_with_TI'][df_bpc['wind_speed'] < (1-2*TI)*cut_in] = 0
#Cut-in wind speed and cutout wind speed
df_bpc['P'][ df_bpc['wind_speed'] < cut_in] = 0
df_bpc['P'][ df_bpc['wind_speed'] > cut_out] = 0
df_bpc['P_with_TI'][ df_bpc['wind_speed'] > cut_out] = 0
del df_bpc['wind_speed']
df_bpc['P'] = df_bpc['P']/1e3
df_bpc['P_with_TI'] = df_bpc['P_with_TI']/1e3
df_bpc = df_bpc.replace(np.nan, 0)
df_bpc.index.name = 'wind speed'
if normalized:
df_bpc = df_bpc/(P*1e-3)
return df_bpc
fig, ax = plt.subplots(figsize=(9, 6))
df_bpc = PowerCurveGenerator(2000, 90, normalized= False, cut_in = 4.5)
df_bpc.columns = [['Modeled PC of V90', 'Smoothed Modelled PC of V90']]
df_bpc.plot(ax = ax, alpha = 2, lw = 1)
df_PC[['Vestas.V90.2000']].plot(ax = ax, lw = 5, alpha = 0.5)
#df_pc[['Bonus.B23.150']].plot(ax = ax)
plt.xlim([0, 30])
plt.xlabel('Windspeed (m/s)')
plt.ylabel('Power (kW/$kW_p$)')
plt.title('Manufacturer\'s and modeled power curve of a Vestas V90')
#Loading the ENGIE data (https://opendata-renewables.engie.com/explore/dataset/la-haute-borne-data-2013-2016/table/)
name = 'ENGIE'
df_SARAH = pd.read_pickle(r'C:\Users\romain\Desktop\Power_Curve_Project\NinjaData_%s_SarahSolar.pkl'%name)
df_WIND = pd.read_pickle(r'C:\Users\romain\Desktop\Power_Curve_Project\NinjaData_%s_MerraWind.pkl'%name)
df_engie = pd.read_pickle(r'C:\Users\romain\Desktop\Power_Curve_Project\la-haute-borne-data-2013-2016.pkl')
# Selection des variables d'intérêt et calcul de l'intensité de turbulence
df_NG = df_engie[['P_avg', 'Ws_avg','Ws_std']]
df_NG['TI'] = df_NG.Ws_std/df_NG.Ws_avg
# Application sur la turbine REpower MM82 2000
turbine = 'REpower.MM82.2000'
TI = 0.10
df_bpc5 = PowerCurveGenerator(P = 2050, d = 82, TI = TI)
fig, ax = plt.subplots(figsize = (12,9))
df_bpc5.plot(ax = ax, ls = '--')
(df_PCj['REpower.MM82.2000']['pc'].iloc[::5]*2050).plot(ax = ax)
plt.title('Turbine : %s \n taux de turbulence = %s' %(turbine, TI))
plt.legend()
plt.xlim([0,15])
#mpld3.display(fig)
df_pc_TI = pd.DataFrame()
df_bpc5 = PowerCurveGenerator(P = 2050, d = 82, TI = 0.05)
df_pc_TI['5%'] = df_bpc5.P_with_TI
df_bpc5 = PowerCurveGenerator(P = 2050, d = 82, TI = 0.10)
df_pc_TI['10%'] = df_bpc5.P_with_TI
df_bpc5 = PowerCurveGenerator(P = 2050, d = 82, TI = 0.15)
df_pc_TI['15%'] = df_bpc5.P_with_TI
df_bpc5 = PowerCurveGenerator(P = 2050, d = 82, TI = 0.20)
df_pc_TI['20%'] = df_bpc5.P_with_TI
fig, ax = plt.subplots(figsize = (16,9))
#color = sns.light_palette("green", 8)
color = sns.color_palette()
df_NG[df_NG.TI < 0.05].plot(kind = 'scatter', x = 'Ws_avg', y = 'P_avg', ax = ax, alpha = 0.1, color = color[0])
df_NG[(df_NG.TI >= 0.05)&(df_NG.TI < 0.1)].plot(kind = 'scatter', x = 'Ws_avg', y = 'P_avg', ax = ax, alpha = 0.1, color = color[1])
df_NG[(df_NG.TI >= 0.10)&(df_NG.TI < 0.15)].plot(kind = 'scatter', x = 'Ws_avg', y = 'P_avg', ax = ax, alpha = 0.1, color = color[2])
df_NG[(df_NG.TI >= 0.15)&(df_NG.TI < 0.2)].plot(kind = 'scatter', x = 'Ws_avg', y = 'P_avg', ax = ax, alpha = 0.1, color = color[3])
df_NG[(df_NG.TI >= 0.20)].plot(kind = 'scatter', x = 'Ws_avg', y = 'P_avg', ax = ax, alpha = 0.1, color = color[4])
(df_PCj['REpower.MM82.2000'].pc.iloc[::5]*2050).plot(ax = ax)
df_pc_TI.plot(ax = ax, alpha = 2, lw = 2, ls = '--')
plt.title('Power curve observed and modelled for various turbulence intensity')
plt.ylabel('Power (kW)')
plt.xlabel('Windspeed (m/s)')
The higher the turbulence is, the smoother the power curves are. It justifies the width of the gaussian filter proportional to the wind speed.
# Observation of the ability of the modele for different wind turbines
# avec TI = 0.10
df_PC = df_PCn * df_TDt['Rated power']
#for manufacturer in set(df_TDt.Manufucturer):
for manufacturer in ['Vestas']:
list_WT = [wt for wt in df_PC.columns if manufacturer in str(wt) and '' in str(wt)]
TI = 0.10
color = sns.color_palette("Paired")
fig, ax = plt.subplots(figsize = (12,9))
for i, wt in enumerate(list_WT):
if wt!= 'Vestas.V164.7000':
P = df_TDt.loc[wt]['Rated power']
d = df_TDt.loc[wt]['Rotor diameter']
df_PC.iloc[::10,:][wt].plot(ax =ax, color = color[i%6])
df_bpc = PowerCurveGenerator(P = P, d = d, TI = TI)
df_bpc_p = df_bpc[['P_with_TI']]
df_bpc_p.columns = ['Model TI=0.10 %s'%wt]
df_bpc_p.plot(ax = ax, ls = '-.', color = color[i%6], legend = False)
df_bpc = PowerCurveGenerator(P = P, d = d, TI = 0.15)
df_bpc_p = df_bpc[['P_with_TI']]
df_bpc_p.columns = ['Model TI=0.15 %s'%wt]
df_bpc_p.plot(ax = ax, ls = '--', color = color[i%6], legend = False)
print(manufacturer)
plt.title('Manufacturers\' and modeled power curve \n %s wind turbines\n for turbulence intensity of 0.10 and 0.15'%manufacturer)
plt.ylabel('Power (kW)')
plt.xlabel('Wind speed (m/s)')
#plt.legend(bbox_to_anchor=(1.2, 0.2))
plt.xlim([0,20])
#Adding the MM82 2MW turbine to the dataframe
df_TD = df_TDt.T
df_TD['REpower.MM82.2000'] = np.nan
df_TD['REpower.MM82.2000'].loc['Rated power'] = 2050
df_TD['REpower.MM82.2000'].loc['Rotor diameter'] = 82
df_TD['REpower.MM82.2000'].loc['Swept area'] = np.pi * (df_TD['REpower.MM82.2000'].loc['Rotor diameter']/2)**2
df_TDt = df_TD.T
df_PCn = df.loc['power_curve']
df_PCn['REpower.MM82.2000'] = df_PCj['REpower.MM82.2000'].pc
df_PC = df_PCn * df_TDt['Rated power']
#Récupération des données moyenne sur les 4 turbines ou une seule
#df_NG2 = df_NG.resample('h').mean()
df_NG2 = df_NG.resample('h').first()
df_NG2.head()
#Average turbulence intensity
df_NG.replace(np.inf, np.nan).resample('h').first().resample('a').mean()
A turbulence intensity of 20% will be used.
#Manufacturer power curves
df_PCc = df_PC[0:-1:10]
df_PCc = pd.concat([df_PCc, df_PC[-1:]])
df_PCc = df_PCc.astype(float)
#Modeled power curves
df_PCm = pd.DataFrame(index = df_PCc.index, columns = df_PCc.columns)
for turbine in df_TDt.index:
#print(turbine)
pc = PowerCurveGenerator(df_TDt.loc[turbine]['Rated power'], df_TDt.loc[turbine]['Rotor diameter'], TI = 0.20, cut_in = 3.5, cut_out = 25)
df_PCm[turbine] = pc.P_with_TI.values
df_PCm
fig, ax = plt.subplots( figsize= (16,9))
df_PCm.plot(ax = ax, ls ='--', alpha = 0.4, legend = False )
df_PCc.plot(ax = ax, alpha = 0.4, legend = False)
plt.title('Compoarison of modeled and manufacturer power curves')
plt.ylabel('Power (kW)')
#Estimation of annual production from manuafturer and modeled power curves
df_Pc = pd.DataFrame(index = df_NG2.index)
df_Pm = pd.DataFrame(index = df_NG2.index)
for turbine in df_TDt.index:
#print(turbine)
df_Pc[turbine] = np.interp(df_NG2.Ws_avg.values, df_PCc.index.values, df_PCc[turbine].values)
df_Pm[turbine] = np.interp(df_NG2.Ws_avg.values, df_PCc.index.values, df_PCm[turbine].values)
df_P = pd.concat([df_Pc, df_Pm], axis = 1, keys = ['Manufacturer','Modeled'])
# Production from manufacturer or modeled power curve as well as measured production.
df_HB = pd.concat([df_NG2['P_avg'], df_P.swaplevel(axis = 1)['REpower.MM82.2000'] ], axis = 1)
df_HB = df_HB['2013':]
df_HB['P measured'] = df_HB.P_avg
del df_HB['P_avg']
df_HB.head()
fig, ax = plt.subplots(figsize = (9, 6))
df_HB['2015-01'][['P measured']].plot(ax = ax, alpha = 1)
df_HB['2015-01'][['Manufacturer','Modeled']].plot(ax = ax, alpha = 0.5)
mpld3.display(fig)
#Calcul of mean absolute error
df_HB['MAE Manufacturer'] = df_HB['P measured'] - df_HB['Manufacturer']
df_HB['MAE Manufacturer'] = df_HB['MAE Manufacturer'].abs()
df_HB['MAE Modeled'] = df_HB['P measured'] - df_HB['Modeled']
df_HB['MAE Modeled'] = df_HB['MAE Modeled'].abs()
df_HB[['MAE Manufacturer','MAE Modeled']].resample('a').mean()
PA = df_HB[['P measured', 'Manufacturer', 'Modeled']].resample('a').mean() / 2050
PA
PA.plot(kind = 'bar')
plt.title('Load factor from measured production \n or assessed from manufacturer and modeled power curve')
plt.ylabel('Load factor')
PA.divide(PA['P measured'], axis = 0)
#Estimation of annual production
import matplotlib.dates as mdates
YP = df_P['2013':].swaplevel(axis = 1).resample('a').sum()
YP.index = [2013, 2014, 2015, 2016]
for turbine in YP.columns.levels[0][0:]:
#print(turbine)
fig, ax = plt.subplots(figsize = (6,4))
YP[turbine].plot(kind = 'bar', ax = ax, rot = 0)
plt.legend(loc = 3)
plt.title('%s' %turbine)
plt.ylabel('Yearly estimated production \n kWh/year')
#Comparison of production from manufacturer and modeled power curve
YP = df_P['2016'].resample('a').sum()
A = (YP['Modeled']/YP['Manufacturer']).T
A.columns = ['Modeled / Manufacturer']
display(A.describe())
A.hist()
B = YP['Manufacturer'].T
B.columns = ['Manufacturer']
B['Modeled'] = YP['Modeled'].T
B['Modeled / Manufacturer'] = (B['Modeled']/B['Manufacturer'])
B = pd.concat([B, df_TDt], axis = 1)
B.head()
#Comparison without small wind turbine
B[B['Rated power'] > 500][['Modeled / Manufacturer']].boxplot()
B.plot(kind = 'scatter', x = 'Manufacturer', y = 'Modeled' )
plt.plot([0, 1e7], [0, 1e7])
A.boxplot()
A.plot(kind = 'hist', alpha = 0.4, bins = 20)